import pandas as pd
import numpy as np
import datetime as dt
import plotly.express as px
import plotly.graph_objects as go
import folium
from folium import plugins
from datetime import datetime
import os
import json
import requests
import time
APP_TOKEN = "bX9qcIUkfVdP5XgCDw4bXmwSt"
SECRET_TOKEN = "xJxlixLhDkJY7TvdRoCYOCY3cVThZ3z7y6QE"
base_url = "https://data.cityofnewyork.us/resource/h9gi-nx95.json?$$app_token={}".format(APP_TOKEN)
url = base_url + "{}"
cnt_url = base_url + "{}{}" # select , where
where_inj = "&$where=number_of_cyclist_injured>0.0&$limit=50000"
where_kill = "&$where=number_of_cyclist_killed>0.0"
inj_df = pd.read_json(url.format(where_inj))
killed_df = pd.read_json(url.format(where_kill))
def dt(date,time):
date = pd.to_datetime(date).dt.date
time = pd.to_datetime(time).dt.time
return date,time
killed_df.accident_date, killed_df.accident_time = dt(killed_df.accident_date,
killed_df.accident_time)
inj_df.accident_date, inj_df.accident_time = dt(inj_df.accident_date,
inj_df.accident_time)
df = (pd
.concat([inj_df,killed_df])
.drop(columns='location')
.drop_duplicates()
.reset_index(drop=True))
df.vehicle_type_code1 = df.vehicle_type_code1.apply(lambda x :str(x).upper())
df.vehicle_type_code2 = df.vehicle_type_code2.apply(lambda x :str(x).upper())
df['Accident Year'] = df.accident_date.apply(lambda x: x.year)
df['Accident Month'] = df.accident_date.apply(lambda x: x.month)
df['Accident Hour'] = df.accident_time.apply(lambda x: x.hour)
def create_df(group):
return (df
.groupby(group)
.collision_id.count()
.reset_index()
.rename(columns={'collision_id':'Number of Accidents'})
)
crash_mo_yr = create_df(['Accident Year','Accident Month'])
crash_hr = create_df('Accident Hour')
crash_mo_hr = create_df(['Accident Month','Accident Hour'])
killed_df['accident_year'] = killed_df.accident_date.apply(lambda x: x.year)
killed_df['accident_month'] = killed_df.accident_date.apply(lambda x: x.month)
killed_df['accident_hr'] = killed_df.accident_time.apply(lambda x: x.hour)
mo_fig = px.area(crash_mo_yr, x="Accident Month", y="Number of Accidents",animation_frame="Accident Year",
range_y=[0,800], range_x=[1,12])
mo_fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 1000
mo_fig.layout.title = "Bicycle Accidents by Month for Each Year"
mo_fig.show()
hr_fig = px.area(crash_mo_hr, x="Accident Hour", y="Number of Accidents",animation_frame="Accident Month",
range_y=[0,400], range_x=[0,23])
hr_fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 1000
hr_fig.layout.title = "Bicycle Accidents by Hour For Each Month"
hr_fig.show()
fdir = './agg_trip'
agg_files = os.listdir(fdir)
agg_df = pd.read_csv(fdir + '/' + agg_files[0]).iloc[:,[0,1]]
agg_df.head()
for i in range(1,len(agg_files)):
agg_df = agg_df.append(pd.read_csv(fdir + '/' + agg_files[i]).iloc[:,[0,1]])
agg_df.Date = pd.to_datetime(agg_df.Date).dt.date
agg_df = agg_df.rename(columns={'Trips over the past 24-hours (midnight to 11:59pm)':'Number of Trips'})
agg_df = agg_df.sort_values('Date')
fig = px.line(agg_df, x='Date', y='Number of Trips', title="Number of CitiBike Trips by Day", hover_name='Date')
fig.show()
trip_df = pd.read_csv('201909-citibike-tripdata.csv')
trip_df['date'] = pd.to_datetime(trip_df.starttime).dt.date
trip_df.date = trip_df.date.apply(str)
trip_df.groupby('date').bikeid.count().reset_index().sort_values('bikeid',ascending=False).head()
high_day = trip_df[trip_df.date == '2019-09-25']
coord092519 = high_day[['start station name','start station id','start station latitude',
'start station longitude','end station name','end station id',
'end station latitude',
'end station longitude']].copy()
coord092519['id'] = (coord092519['start station name'] +
coord092519['end station name'])
coord092519 = coord092519.groupby(['start station name','start station id','start station latitude',
'start station longitude',
'end station name','end station id','end station latitude',
'end station longitude']).id.count().reset_index()
coord092519['filt'] = coord092519.apply(lambda x: 'y' if x['start station name'] == x['end station name'] else '',
axis=1)
coord092519 = coord092519[coord092519.filt != 'y'].reset_index(drop=True)
cohort = coord092519[coord092519.id >= 4]
cohort = cohort.rename(columns={'id':'count'})
cohort['id'] = cohort['start station id'].apply(str) + '-' + cohort['end station id'].apply(str)
routes = pd.read_csv('backup_route_file.csv')
routes = routes[routes.geojson != '{"message":"Too Many Requests"}'].reset_index(drop=True)
cohort_df = pd.merge(cohort,routes[['id','geojson']],on='id',how='inner')
cohort_df = cohort_df[['geojson']].drop_duplicates()
geojson = list(cohort_df.geojson)
gjson = []
for i in range(len(geojson)):
gjson.append(json.loads(geojson[i])['routes'][0]['geometry']['coordinates'])
for i in gjson:
for j in i:
j.reverse()
loc_df = df[['latitude','longitude','on_street_name','off_street_name','accident_date']].copy()
loc_df = loc_df[(pd.isna(loc_df.latitude) == False) &
(loc_df.latitude != 0) &
(loc_df.longitude != 0)
]
loc_df.on_street_name = loc_df.on_street_name.str.strip()
loc_df.off_street_name = loc_df.off_street_name.str.strip()
loc_df.accident_date = loc_df.accident_date.apply(str)
loc_df['lat_lon_list'] = loc_df.apply(lambda x: [x.longitude,x.latitude], axis=1)
loc_df = loc_df.sort_values('accident_date').reset_index(drop=True)
features = [
{
'type': 'Feature',
'geometry': {
'type': 'MultiPoint',
'coordinates': list(loc_df.lat_lon_list),
},
'properties': {
'times': list(loc_df.accident_date),
'icon': 'circle',
'iconstyle': {
'fillColor': 'red',
'fillOpacity': 0.5,
'stroke': 'false',
'radius': 5
},
'style': {'weight': 0.5}
}
}
]
nyc_map = folium.Map(location=[40.735,-73.95],zoom_start=11.5, tiles='cartodbdark_matter')
folium.PolyLine(gjson, weight=1, opacity=0.2).add_to(folium
.FeatureGroup(name='Routes')
.add_to(nyc_map))
plugins.TimestampedGeoJson(
{
'type': 'FeatureCollection',
'features': features
},
period='P1M',
add_last_point=True,
auto_play=True,
loop=False,
max_speed=2,
loop_button=True,
date_options='YYYY-MM-DD',
time_slider_drag_update=True,
duration='P1M'
).add_to(nyc_map)
folium.LayerControl().add_to(nyc_map)
nyc_map.save('test.html')
nyc_map
bike_list = ['BIKE','BICYCLE','E-BIK','BICYCLE','BYCIC']
cause_df = df[((pd.isna(df.contributing_factor_vehicle_3) == True) &
((df.vehicle_type_code1.isin(bike_list) == True) |
(df.vehicle_type_code2.isin(bike_list) == True))
)]
cause_df = cause_df[(cause_df.vehicle_type_code1.isin(bike_list) == False) |
(cause_df.vehicle_type_code2.isin(bike_list) == False)]
def bike_cause(x):
if x.vehicle_type_code1 in bike_list:
return x.contributing_factor_vehicle_1
else:
return x.contributing_factor_vehicle_2
def veh_cause(x):
if x.vehicle_type_code1 not in bike_list:
return x.contributing_factor_vehicle_1
else:
return x.contributing_factor_vehicle_2
cause_df['bike_cause'] = cause_df.apply(bike_cause, axis=1)
cause_df['veh_cause'] = cause_df.apply(veh_cause, axis=1)
# cause_df = cause_df[['collision_id','bike_cause', 'veh_cause']]
bike_cause_df = (cause_df
.groupby('bike_cause')
.collision_id.count()
.reset_index()
.sort_values('collision_id', ascending=False)
.head(15)
.reset_index(drop=True)
)
veh_cause_df = (cause_df
.groupby('veh_cause')
.collision_id.count()
.reset_index()
.sort_values('collision_id', ascending=False)
.head(15)
.reset_index(drop=True)
)
veh_cause_df
bike_cause_df